titanic <- read.csv("Titanic.csv")
head(titanic,5)
##   Class Survived                                            Name    Sex     Age
## 1     1        1                   Allen, Miss. Elisabeth Walton female 29.0000
## 2     1        1                  Allison, Master. Hudson Trevor   male  0.9167
## 3     1        0                    Allison, Miss. Helen Loraine female  2.0000
## 4     1        0            Allison, Mr. Hudson Joshua Creighton   male 30.0000
## 5     1        0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000
##        Dept SibSp ParCh Ticket     Fare   Cabin Embarked Job
## 1 passenger     0     0  24160 211.3375      B5        S    
## 2 passenger     1     2 113781   151.55 C22 C26        S    
## 3 passenger     1     2 113781   151.55 C22 C26        S    
## 4 passenger     1     2 113781   151.55 C22 C26        S    
## 5 passenger     1     2 113781   151.55 C22 C26        S
# Filter out missing values in the "Survived" column
titanic <- titanic %>% filter(!is.na(Survived))
ggplot(data = titanic, aes(x = Age)) +
  geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
  labs(x = "Age", y = "Count") +
  ggtitle("Age Distribution of Passengers")
## Warning: Removed 34 rows containing non-finite outside the scale range
## (`stat_bin()`).

titanic %>%
  mutate(Age_Group = cut(Age, breaks = c(0, 18, 30, 50, Inf), labels = c("0-18", "19-30", "31-50", "51+"))) %>%
  group_by(Age_Group) %>%
  summarise(Survival_Rate = mean(Survived)) %>%
  ggplot(aes(x = Age_Group, y = Survival_Rate)) +
  geom_bar(stat = "identity", fill = "skyblue", color = "black") +
  labs(x = "Age Group", y = "Survival Rate") +
  ggtitle("Survival Rate by Age Group")

# Bar plot of Survived column
ggplot(data = titanic, aes(x = factor(Survived))) +
  geom_bar() +
  labs(x = "Survived", y = "Count")

# Group by Sex and calculate survival rate
survival_by_sex <- titanic %>%
  group_by(Sex) %>%
  summarise(survival_rate = mean(Survived))

# Bar plot of survival rate by Sex
ggplot(data = survival_by_sex, aes(x = Sex, y = survival_rate)) +
  geom_col() +
  labs(x = "Sex", y = "Survival Rate")

# Group by Pclass and calculate survival rate
survival_by_class <- titanic %>%
  group_by(Class) %>%
  summarise(survival_rate = mean(Survived))

# Bar plot of survival rate by Pclass
ggplot(data = survival_by_class, aes(x = factor(Class), y = survival_rate)) +
  geom_col() +
  labs(x = "Class", y = "Survival Rate")

survival_by_sex <- titanic %>%
  group_by(Sex) %>%
  summarise(survival_rate = mean(Survived))

# Bar plot of survival rate by Sex
ggplot(data = survival_by_sex, aes(x = Sex, y = survival_rate)) +
  geom_col(fill = "skyblue", color = "black") +
  labs(x = "Sex", y = "Survival Rate") +
  ggtitle("Survival Rate by Sex")

ggplot(data = titanic, aes(x = factor(Class), fill = factor(Survived))) +
  geom_bar() +
  facet_wrap(~Sex) +
  labs(x = "Class", y = "Count", fill = "Survived") +
  ggtitle("Survival Count by Class and Sex")

`

# Group by Age and calculate survival rate
survival_by_age <- titanic %>%
  group_by(Age) %>%
  summarise(survival_rate = mean(Survived))

# Line plot of survival rate by Age
ggplot(data = survival_by_age, aes(x = Age, y = survival_rate)) +
  geom_line() +
  labs(x = "Age", y = "Survival Rate")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

# Load necessary libraries
library(plotly)

# Scatter plot of Fare vs. Age colored by Survived
interact <- ggplot(data = titanic, aes(x = Age, y = Fare, color = factor(Survived))) +
  geom_point() +
  labs(x = "Age", y = "Fare", color = "Survived") +
  ggtitle("Fare vs. Age Colored by Survival")

# Convert ggplot to plotly
interactive <- ggplotly(interact)

# Show interactive plot
interactive
# Calculate survival rate by Sex, Class, and Age
survival_by_group <- titanic %>%
  group_by(Sex, Class, Age) %>%
  summarise(survival_rate = mean(Survived))
## `summarise()` has grouped output by 'Sex', 'Class'. You can override using the
## `.groups` argument.
# Create an interactive bar plot using plotly
plotly_bar_plot <- ggplot(data = survival_by_group, aes(x = interaction(Sex, Class, Age), y = survival_rate)) +
  geom_bar(stat = "identity", fill = "skyblue", color = "black") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  labs(x = "Sex, Class, Age", y = "Survival Rate") +
  ggtitle("Survival Rate by Sex, Class, and Age") +
  theme_minimal()

plotly_bar_plot <- ggplotly(plotly_bar_plot)
plotly_bar_plot